package my.news;

import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Delete words
 * @author yura
 *
 */
public class SelectWithWordProcessor implements DocumentSetProcessor {
    public String mainWord;
    public int count;


    public SelectWithWordProcessor(String mainWord, int count) {
        super();
        this.mainWord = mainWord;
        this.count = count;

    }


    public Set<Content> process(Set<Content> contents) {
        Pattern p = Pattern.compile(mainWord);
        HashSet<Content> result = new HashSet<Content>(contents);
        for(Content doc : contents) {
            Matcher m = p.matcher(doc.data);
            int curCount = 0;
            while(m.find() && curCount < count) {
                curCount++;
            }
            if (curCount < count) {
                result.remove(doc);
            }
        }
        return result;
    }

}
